#INTRO: LOADING PACKAGES ETC----
rm(list=ls())
cat("\014")
library(foreign)
library(reshape2)
library(data.table)
library(stargazer)
library(MatchIt)
library(ggplot2)
library(gmodels)
library(gridExtra)
library(haven)
library(plyr)
library(tidyr)
library(reshape)
library(countrycode)
library(readr)
library(gnm)
library(MNP)
library(nls2)
library(nlstools)
library(stats)
library(dplyr)
library(matrixStats)
library(miceadds)
library(minpack.lm)
library(lfe)
library(lubridate)
library(AER)
library(reshape)
library(reshape2)
library(tictoc)
library(xtable)
library(Formula)
library(mlogit)
library(plm)
library(survival)
library(plotly)
library(alpaca)
library(lmtest)
library(sandwich)
library(fuzzyjoin)
library(readxl)


#READ DATA:----

#read Thomson data:
setwd("/Users/alessioalbarello/Documents/PhD/_IIYP/P24_/_dThomson")
dT=read_dta("AJPS_CPPG_pledges_10April2017.dta")

#subset dataset for regression: keep coalitions, governing parties, pledges modifying the status quo, and pledges with weights
#(excluding pledges w/o weights exclude only Italian coalitions which are actually single coalition government, therefore they would be excluded anyways (#801,802))
dT=dT[dT$coalition==1 & dT$govparty==1 & dT$sq==0 & !is.na(dT$sampwtmod4021),]

summary(dT$excllink[dT$coalition==1 & dT$govparty==1 & dT$sq==0 & !is.na(dT$sampwtmod4021)])#NB: no need to filter for excllink bc it's always NA in subset

#read CMP data:
dC=read_excel("MPDataset_MPDS2022a.xlsx")
dC=dC[!(dC$countryname=="Ireland"&dC$date==198202),]#Ireland 1982 has two elections. Eliminate the one I don't use later because I later merge by year.
dC$year=as.numeric(substr(dC$date,1,4))
names(dC)[names(dC)=="party"]="partyid"
dC$SeatSh=dC$absseat/dC$totseats
dC=dC[,c("year","partyid","SeatSh")]


#CREATE DATASET FOR ANALYSIS:----

##coding partyid to merge CMP and Thomson data:
sort(unique(dT$partyid[dT$coalition==1 & dT$govparty==1 & dT$sq==0 & !is.na(dT$sampwtmod4021)]))#print codes of Thomson parties that need to be coded for CMP seat share merge:
#22 parties to code: 301  302  303  304  401  402  403  404  408  503  510  701  702  703  704 1004 1006 1009 1012 1201 1202 1203

dC$partyid[dC$partyid==22320]=301
dC$partyid[dC$partyid==22330]=302
dC$partyid[dC$partyid==22521]=303
dC$partyid[dC$partyid==22420]=304

dC$partyid[dC$partyid==53620]=401
dC$partyid[dC$partyid==53520]=402
dC$partyid[dC$partyid==53320]=403
dC$partyid[dC$partyid==53420]=404
dC$partyid[dC$partyid==53110]=408

dC$partyid[dC$partyid==11810]=503
#dC$partyid[dC$partyid==]=510 The Alliance (Sweden) not present in CMP. It'd be excluded later because not coded all governing parties

dC$partyid[dC$partyid==41320]=701
dC$partyid[dC$partyid==41521]=702
dC$partyid[dC$partyid==41420]=703
dC$partyid[dC$partyid==41113]=704

#dC$partyid[dC$partyid==80228]=1004 excluded since same than next line and refers to one party govt that is excluded anyways later
dC$partyid[dC$partyid==80221]=1004
dC$partyid[dC$partyid==80951]=1006
dC$partyid[dC$partyid==80902]=1009
dC$partyid[dC$partyid==80811]=1012

dC$partyid[dC$partyid==42520]=1201
dC$partyid[dC$partyid==42420]=1202
dC$partyid[dC$partyid==42320]=1203


##merge CMP and Thomson:
d=merge(dT, dC, by = c("year","partyid"))
aggregate(d[,"pledgecoagree"], list(d$country,d$year,d$partyid), max)
colnames(d)


##create variables for analysis:
d=d %>% group_by(country, year, partyid) %>% mutate(PledgeSh = mean(fulfil2))
d=d %>% group_by(country, year, partyid) %>% mutate(PledgeNu = sum(fulfil2))
d=d %>% group_by(country, year, partyid) %>% filter(row_number(PledgeSh) == 1)
d=d[,c(4,1,2,15,31,16,77,78,79,53,57,51,54,55)]#keep only variables of interest
d=d[order(d$country,d$year,d$partyid),]
d=d %>% group_by(country, year) %>% mutate(SeatShCT = sum(SeatSh))
d$SeatShC=d$SeatSh/d$SeatShCT
d=d %>% group_by(country, year) %>% mutate(PledgeShT = sum(PledgeSh))
d$PledgeShC=d$PledgeSh/d$PledgeShT
d=d %>% group_by(country, year) %>% mutate(PledgeNuT = sum(PledgeNu))
d$PledgeNuC=d$PledgeNu/d$PledgeNuT


##exlcuding governments with not all parties in the government are coded:
#also excluded Nl 1989 and Ie 2002 given not all policy area coded
d=d[!(d$country==3 & d$year==1989) & !(d$country==4 & d$year==2002),]
d=d[!(d$country==3 & d$year==1986) & !(d$country==5 & d$year==2006) & !(d$country==10 & d$year==1994) & !(d$country==10 & d$year==2005),]


#Table 4:----
M1=lm(PledgeShC~SeatShC, data=d)
M2=lm(PledgeShC~SeatShC+chex+degomedlogrile, data=d)
M3=lm(PledgeShC~SeatShC+chex+degomedlogrile+ministry, data=d)

M4=lm(PledgeNuC~SeatShC, data=d)
M5=lm(PledgeNuC~SeatShC+chex+degomedlogrile, data=d)
M6=lm(PledgeNuC~SeatShC+chex+degomedlogrile+ministry, data=d)

stargazer(M1, M2, M3, M4, M5, M6,
          covariate.labels = c("Seat Share", "Formateur", "Abs Distance to MLP","Relevant Portfolio", "Constant"),
          type = "latex",
          out = "Table4.tex")

#table p-values test with null beta=0 and beta=1:
summary(M1)$coefficients[2,4]
summary(M2)$coefficients[2,4]
summary(M3)$coefficients[2,4]
summary(M4)$coefficients[2,4]
summary(M5)$coefficients[2,4]
summary(M6)$coefficients[2,4]
2*pt(abs((M1$coefficients[2]-1)/sqrt(diag(vcov(M1)))[2]),M1$df.residual,lower.tail = FALSE)
2*pt(abs((M2$coefficients[2]-1)/sqrt(diag(vcov(M2)))[2]),M2$df.residual,lower.tail = FALSE)
2*pt(abs((M3$coefficients[2]-1)/sqrt(diag(vcov(M3)))[2]),M3$df.residual,lower.tail = FALSE)
2*pt(abs((M4$coefficients[2]-1)/sqrt(diag(vcov(M4)))[2]),M4$df.residual,lower.tail = FALSE)
2*pt(abs((M5$coefficients[2]-1)/sqrt(diag(vcov(M5)))[2]),M5$df.residual,lower.tail = FALSE)
2*pt(abs((M6$coefficients[2]-1)/sqrt(diag(vcov(M6)))[2]),M6$df.residual,lower.tail = FALSE)


#Table A.12:----
M1FE=lm(PledgeShC~SeatShC+factor(country)-1, data=d)
M2FE=lm(PledgeShC~SeatShC+chex+degomedlogrile+factor(country)-1, data=d)
M3FE=lm(PledgeShC~SeatShC+chex+degomedlogrile+ministry+factor(country)-1, data=d)

M4FE=lm(PledgeNuC~SeatShC+factor(country)-1, data=d)
M5FE=lm(PledgeNuC~SeatShC+chex+degomedlogrile+factor(country)-1, data=d)
M6FE=lm(PledgeNuC~SeatShC+chex+degomedlogrile+ministry+factor(country)-1, data=d)

stargazer(M1FE, M2FE, M3FE, M4FE, M5FE, M6FE,
          covariate.labels = c("Seat Share", "Formateur", "Abs Distance to MLP","Relevant Portfolio"
                               , "Netherlands", "Ireland", "Germany", "Bulgaria", "Austria"),
          type = "latex",
          out = "TableA.12.tex")

#table p-values test with null beta=0 and beta=1:
summary(M1FE)$coefficients[1,4]
summary(M2FE)$coefficients[1,4]
summary(M3FE)$coefficients[1,4]
summary(M4FE)$coefficients[1,4]
summary(M5FE)$coefficients[1,4]
summary(M6FE)$coefficients[1,4]
2*pt(abs((M1FE$coefficients[1]-1)/sqrt(diag(vcov(M1FE)))[1]),M1FE$df.residual,lower.tail = FALSE)
2*pt(abs((M2FE$coefficients[1]-1)/sqrt(diag(vcov(M2FE)))[1]),M2FE$df.residual,lower.tail = FALSE)
2*pt(abs((M3FE$coefficients[1]-1)/sqrt(diag(vcov(M3FE)))[1]),M3FE$df.residual,lower.tail = FALSE)
2*pt(abs((M4FE$coefficients[1]-1)/sqrt(diag(vcov(M4FE)))[1]),M4FE$df.residual,lower.tail = FALSE)
2*pt(abs((M5FE$coefficients[1]-1)/sqrt(diag(vcov(M5FE)))[1]),M5FE$df.residual,lower.tail = FALSE)
2*pt(abs((M6FE$coefficients[1]-1)/sqrt(diag(vcov(M6FE)))[1]),M6FE$df.residual,lower.tail = FALSE)

